In [2]:
%matplotlib inline
from matplotlib import pyplot as plt
import matplotlib.mlab as mlab
import csv
from scipy.stats import norm
import numpy as np
import scipy.stats as stats
In [3]:
data = open('../data/data.csv', 'r').readlines()
fieldnames = ['x', 'y', 'z', 'unmasked', 'synapses']
reader = csv.reader(data)
reader.next()
rows = [[int(col) for col in row] for row in reader]
In [4]:
#This gets a set of all the z and y values
sorted_y = sorted(list(set([r[1] for r in rows])))
sorted_z = sorted(list(set([r[2] for r in rows])))
# y vals = [1369, 1408, 1447, 1486, 1525, 1564, 1603, 1642, 1681,
#1720, 1759, 1798, 1837, 1876, 1915, 1954, 1993, 2032,
#2071, 2110, 2149, 2188, 2227, 2266, 2305, 2344, 2383,
#2422, 2461, 2500, 2539, 2578, 2617, 2656, 2695, 2734,
#2773, 2812, 2851, 2890, 2929, 2968, 3007, 3046, 3085,
#3124, 3163, 3202, 3241, 3280, 3319, 3358]
In [48]:
#Can we plot histograms representing the distribution of synapses in each layer in the Y direction?
#What can we infer?
for i in sorted_y:
unmaskedSynapsesNoZero = ([r[-1] for r in rows if r[-2] != 0 if r[-1] !=0 if r[1] == i])
mean = np.mean(unmaskedSynapsesNoZero)
variance = np.var(unmaskedSynapsesNoZero)
plt.hist(unmaskedSynapsesNoZero, bins=50)
plt.title("Layer " + str(i))
plt.show()
print "Layer " + str(i) + " has a mean: " + str(mean) + " and variance: " + str(variance)
#Synapse density and distribution radically decreases in the deeper layers
In [49]:
#Can we fit a gaussian distribution to each layer?
for i in sorted_y:
unmaskedSynapsesNoZero = ([r[-1] for r in rows if r[-2] != 0 if r[-1] !=0 if r[1] == i])
# best fit of data
(mu, sigma) = norm.fit(unmaskedSynapsesNoZero)
# the histogram of the data
n, bins, patches = plt.hist(unmaskedSynapsesNoZero, 60, normed=1, facecolor='green', alpha=0.75)
# add a 'best fit' line
y = mlab.normpdf( bins, mu, sigma)
l = plt.plot(bins, y, 'r--', linewidth=2)
#plot
plt.xlabel("Layer " + str(i))
plt.ylabel('Probability')
plt.title(r'$\mathrm{Histogram\ of\ Layer %.f}\ \mu=%.3f,\ \sigma=%.3f$' %(i, mu, sigma))
plt.grid(True)
plt.show()
#Gaussian does not quite fit
In [50]:
from sklearn import mixture
def fit_samples(samples):
gmix = mixture.GMM(n_components=2, covariance_type='full')
gmix.fit(samples)
print gmix.means_
colors = ['r' if i==0 else 'g' for i in gmix.predict(samples)]
ax = plt.gca()
ax.scatter(samples[:,0], samples[:,1], c=colors, alpha=0.8)
plt.show()
In [53]:
#Can we fit gamma distribution?
#Gamma does not quite fit
for i in sorted_y:
unmaskedSynapsesNoZero = ([r[-1] for r in rows if r[-2] != 0 if r[-1] !=0 if r[1] == i])
alpha, loc, beta=stats.gamma.fit(unmaskedSynapsesNoZero)
rv = stats.gamma(alpha,loc,beta)
h = plt.hist(unmaskedSynapsesNoZero, normed=True, bins=50)
x = np.linspace(0,400)
plt.plot(x, rv.pdf(x), lw=2)
plt.show()
In [54]:
#Attempt to plot the entire data set in 3d in order to find some other characteristics?
#The plot does not look that great. Too dense. Needs adjustment
from mpl_toolkits.mplot3d import Axes3D
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
#print unmaskedSynapsesNoZero
x = []
y = []
z = []
val = []
for r in rows:
if r[-2] != 0:
if r[-1] !=0:
x.append(r[0])
y.append(r[1])
z.append(r[2])
ax.scatter(x, y, z, c='b')
plt.show()
In [20]:
#until now we have not taken into account the value in the unmasked column
#From what I understand unmasked is equal to the amount of 'unmasked' voxels in each
#super voxel that were looked at due to the fact that other areas were deemed not
#to have important information.
#So the true value of synapses per super voxel is synapse number divided by unmasked number
#Can we fit a Gaussian to this distribution?
s = []
for r in rows:
if r[-2] != 0:
if r[-1] !=0:
s.append(float(r[-1])/float(r[-2]))
#GAUSSIAN
# best fit of data normal curve (gaussian)
(mu, sigma) = norm.fit(s)
# the histogram of the data
n, bins, patches = plt.hist(s, 60, normed=1, facecolor='green', alpha=0.75)
# add a 'best fit' line
y = mlab.normpdf( bins, mu, sigma)
l = plt.plot(bins, y, 'r--', linewidth=2)
plt.show()
In [23]:
from lmfit.models import SkewedGaussianModel
s = []
for r in rows:
if r[-2] != 0:
if r[-1] !=0:
s.append(float(r[-1])/float(r[-2]))
#GAUSSIAN
# best fit of data normal curve (gaussian)
(mu, sigma) = norm.fit(s)
# the histogram of the data
n, bins, patches = plt.hist(s, 60, normed=1, facecolor='green', alpha=0.75)
#xvals, yvals = read_your_histogram()
model = SkewedGaussianModel()
# set initial parameter values
params = model.make_params(amplitude=10, center=0, sigma=1, gamma=0)
# adjust parameters to best fit data.
result = model.fit(n, params, x=bins[:60])
print(result.fit_report())
#pylab.plot(xvals, yvals)
plt.plot(bins[:60], result.best_fit, 'b--', linewidth=2)
# add a 'best fit' line
y = mlab.normpdf( bins, mu, sigma)
l = plt.plot(bins, y, 'r--', linewidth=2)
plt.xticks(np.arange(0, .004, .001))
plt.yticks(np.arange(0, 1200, 200))
plt.title("Distribution Of Synapes Density Accounting for Unmasked",fontsize =20 )
plt.xlabel("Synapse Density", fontsize =20)
plt.ylabel("Count",fontsize =20)
plt.legend(['Fitted SkewedGaussian', 'Fitted Gaussian'], loc='upper right')
plt.show()
In [28]:
#take into account unmasked for each layer
#Both Gaussian and Gamma give somewhat nice curves
from lmfit.models import SkewedGaussianModel
import pylab
for i in sorted_y:
s = []
for r in rows:
if r[-2] != 0:
if r[-1] !=0:
if r[1] == i:
s.append(float(r[-1])/float(r[-2]))
# the histogram of the data
n, bins, patches = plt.hist(s, 60, normed=1, facecolor='green', alpha=0.75)
model = SkewedGaussianModel()
params = model.make_params(amplitude=10, center=0, sigma=1, gamma=0)
# adjust parameters to best fit data.
result = model.fit(n, params, x=bins[:60])
#print(result.fit_report())
#pylab.plot(xvals, yvals)
plt.plot(bins[:60], result.best_fit, 'b--', linewidth=2)
#GAUSSIAN
# best fit of data normal curve (gaussian)
(mu, sigma) = norm.fit(s)
# add a 'best fit' line
y = mlab.normpdf( bins, mu, sigma)
l = plt.plot(bins, y, 'r--', linewidth=2)
plt.xticks(np.arange(0, .0035, .001))
plt.yticks(np.arange(0, 1600, 200))
plt.xlim([0, .0035])
plt.ylim([0,1600])
plt.suptitle("Distribution Of Density in Y Layer: "+ str(i),fontsize = 20)
plt.xlabel("Synapse Density", fontsize = 20)
plt.ylabel("Count",fontsize = 20)
plt.legend(['Fitted SkewedGaussian', 'Fitted Gaussian'], loc='upper right',bbox_to_anchor=(1.3, 0.5))
pylab.savefig('Y'+str(i)+'.png')
plt.show()
In [57]:
#Does a colored scatter plot based on synapse concentration gie us anymore insight?
import matplotlib
#print unmaskedSynapsesNoZero
allVal = []
for r in rows:
if r[-2] != 0:
if r[-1] !=0:
allVal.append(r[-1])
norm = matplotlib.colors.Normalize(vmin = np.min(allVal), vmax = np.max(allVal), clip = False)
for i in sorted_y:
x = []
z = []
val = []
for r in rows:
if r[-2] != 0:
if r[-1] !=0:
if r[1] == i:
x.append(r[0])
z.append(r[2])
val.append(r[-1])
plt.scatter(x,z,s = 50, c = val,norm=norm)
plt.show()
In [59]:
#This is broken !! Attempt to fix color map in full volume notebook
x = []
z = []
val = []
for r in rows:
if r[-2] != 0:
if r[-1] !=0:
if r[1] == 1369:
x.append(r[0])
z.append(r[2])
val.append(r[-1])
val = np.array(val)
x = np.array(x)
z= np.array(z)
plt.pcolormesh(x, z, val)
plt.colorbar()
plt.show()
In [60]:
#This is just a test to see what the same plot would look like in the Z direction
allVal = []
for r in rows:
if r[-2] != 0:
if r[-1] !=0:
allVal.append(float(r[-1])/r[-2])
norm = matplotlib.colors.Normalize(vmin = np.min(allVal), vmax = np.max(allVal), clip = False)
for i in sorted_z:
x = []
y = []
val = []
for r in rows:
if r[-2] != 0:
if r[-1] !=0:
if r[2] == i:
x.append(r[0])
y.append(r[1])
val.append(float(r[-1])/r[-2])
plt.scatter(x,y,s = 10, c = val,norm=norm)
plt.show()
In [61]:
# Is there a relationship between synapse count and unmasked?
#Is high synapse count correlated with high unmasked?
#Yes! but this makes sense because only voxels that were deemed to have important information were unmasked
#Masked voxels probably had unimportant tissue unrelated to synapses
unmaskedNotZeroUnmask = []
valNotZeroUnmask= []
unmaskedNotZeroVal = []
valNotZeroVal =[]
unmaskedBothNotZero =[]
valBothNotZero = []
for r in rows:
if r[-2] != 0: #unmasked not zero
unmaskedNotZeroUnmask.append(r[-2])
valNotZeroUnmask.append(r[-1])
if r[-1] !=0: #value is not zero
unmaskedNotZeroVal.append(r[-2])
valNotZeroVal.append(r[-1])
if r[2] != 0:
if r[-1] !=0:
unmaskedBothNotZero.append(r[-2])
valBothNotZero.append(r[-1])
#all data
corr = stats.pearsonr(rows[-1],rows[-2])
print corr
#Unmasked is not zero
corr = stats.pearsonr(unmaskedNotZeroUnmask, valNotZeroUnmask)
print corr
#Value is not zero
corr = stats.pearsonr(unmaskedNotZeroVal, valNotZeroVal)
print corr
#Both are not zero
corr = stats.pearsonr(unmaskedBothNotZero, valBothNotZero)
print corr
In [62]:
import pandas as pd
columns = ['Mean','Variance', 'Median', 'Min', 'Max', 'Mean Account For Unmasked',
'Variance Account For Unmasked', 'Median Account For Unmasked',
'Min Account For Unmasked', 'Max Account For Unmasked']
index = sorted_y
df = pd.DataFrame(index=index, columns=columns)
for i in sorted_y:
syn = []
synAccountUn = []
for r in rows:
if r[-2] != 0:
if r[-1] !=0:
if r[1] == i:
synAccountUn.append(float(r[-1])/r[-2])
syn.append(r[-1])
nextRow = [np.mean(syn), np.var(syn), np.median(syn), min(syn), max(syn),np.mean(synAccountUn),
np.var(synAccountUn), np.median(synAccountUn), min(synAccountUn), max(synAccountUn)]
df.loc[i] = nextRow
print df
In [ ]: